In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
In [2]:
#pip install -U kaleido #for saving figures into png, jpeg format
In [3]:
data = pd.read_excel("../Data/socotecDB.xls")
data.head()
WARNING *** OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero
Out[3]:
ID BILL_ID SUPPLIER OFFICE UTILITY_LOCATION UTILITY_CITY UTILITY_REGION SUPPLY_TYPE MONTHLY_F1 MONTHLY_F2 MONTHLY_F3 MC MONTHLY_SMC MONTH COST_Kw_h COST_MC COST_SMC MONTHLY_BILL_AMOUNT RENEWABLE_SOURCE_PERCENTUAL NOTES
0 1 7026491105 A2A ENERGIA SPA ALTRO VIA EDISON TOMMASO 045 - 20026 NOVATE MILANESE MI MI LOMBARDIA METANO 0.0 0.0 0.0 0.0 336.78 GENNAIO 0.0 0.0 0.65 218.0 0.0 0
1 2 7026491105 A2A ENERGIA SPA ALTRO VIA EDISON TOMMASO 045 - 20026 NOVATE MILANESE MI MI LOMBARDIA METANO 0.0 0.0 0.0 0.0 202.47 FEBBRAIO 0.0 0.0 0.66 134.0 0.0 0
2 3 7026491105 A2A ENERGIA SPA ALTRO VIA EDISON TOMMASO 045 - 20026 NOVATE MILANESE MI MI LOMBARDIA METANO NaN NaN NaN 0.0 132.31 MARZO 0.0 0.0 0.81 107.0 0.0 0
3 4 7026491105 A2A ENERGIA SPA ALTRO VIA EDISON TOMMASO 045 - 20026 NOVATE MILANESE MI MI LOMBARDIA METANO 0.0 0.0 0.0 0.0 71.16 APRILE 0.0 0.0 0.82 58.0 0.0 0
4 5 7026491105 A2A ENERGIA SPA ALTRO VIA EDISON TOMMASO 045 - 20026 NOVATE MILANESE MI MI LOMBARDIA METANO 0.0 0.0 0.0 0.0 23.05 MAGGIO 0.0 0.0 1.47 34.0 0.0 0
In [4]:
data.shape
Out[4]:
(578, 20)
In [5]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 578 entries, 0 to 577
Data columns (total 20 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   ID                           578 non-null    int64  
 1   BILL_ID                      578 non-null    object 
 2   SUPPLIER                     578 non-null    object 
 3   OFFICE                       578 non-null    object 
 4   UTILITY_LOCATION             578 non-null    object 
 5   UTILITY_CITY                 578 non-null    object 
 6   UTILITY_REGION               578 non-null    object 
 7   SUPPLY_TYPE                  577 non-null    object 
 8   MONTHLY_F1                   576 non-null    float64
 9   MONTHLY_F2                   576 non-null    float64
 10  MONTHLY_F3                   575 non-null    float64
 11  MC                           573 non-null    float64
 12  MONTHLY_SMC                  578 non-null    float64
 13  MONTH                        578 non-null    object 
 14  COST_Kw_h                    576 non-null    float64
 15  COST_MC                      571 non-null    float64
 16  COST_SMC                     577 non-null    float64
 17  MONTHLY_BILL_AMOUNT          576 non-null    float64
 18  RENEWABLE_SOURCE_PERCENTUAL  567 non-null    float64
 19  NOTES                        575 non-null    object 
dtypes: float64(10), int64(1), object(9)
memory usage: 90.4+ KB
In [6]:
data.isna().any()
Out[6]:
ID                             False
BILL_ID                        False
SUPPLIER                       False
OFFICE                         False
UTILITY_LOCATION               False
UTILITY_CITY                   False
UTILITY_REGION                 False
SUPPLY_TYPE                     True
MONTHLY_F1                      True
MONTHLY_F2                      True
MONTHLY_F3                      True
MC                              True
MONTHLY_SMC                    False
MONTH                          False
COST_Kw_h                       True
COST_MC                         True
COST_SMC                        True
MONTHLY_BILL_AMOUNT             True
RENEWABLE_SOURCE_PERCENTUAL     True
NOTES                           True
dtype: bool
In [7]:
data["MONTH"].unique()
Out[7]:
array(['GENNAIO', 'FEBBRAIO', 'MARZO', 'APRILE', 'MAGGIO', 'GIUGNO',
       'LUGLIO', 'OTTOBRE', 'NOVEMBRE', 'DICEMBRE', 'SETTEMBRE', 'AGOSTO'],
      dtype=object)
In [8]:
#Datetime correction = rename all the month and change the type of this column
data["MONTH"] = data["MONTH"].replace({"GENNAIO":"January",
                         "FEBBRAIO": "February",
                         "MARZO": "March",
                         "APRILE": "April",
                         "MAGGIO": "May",
                         "GIUGNO": "June",
                         "LUGLIO": "July",
                         "AGOSTO": "August",
                         "SETTEMBRE": "September",
                         "OTTOBRE": "October",
                         "NOVEMBRE": "November",
                         "DICEMBRE": "December"})
In [9]:
data["MONTH"] = pd.to_datetime("2021-" + data["MONTH"],format="%Y-%B")
In [10]:
data.info() #Check the type of [MONTH]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 578 entries, 0 to 577
Data columns (total 20 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   ID                           578 non-null    int64         
 1   BILL_ID                      578 non-null    object        
 2   SUPPLIER                     578 non-null    object        
 3   OFFICE                       578 non-null    object        
 4   UTILITY_LOCATION             578 non-null    object        
 5   UTILITY_CITY                 578 non-null    object        
 6   UTILITY_REGION               578 non-null    object        
 7   SUPPLY_TYPE                  577 non-null    object        
 8   MONTHLY_F1                   576 non-null    float64       
 9   MONTHLY_F2                   576 non-null    float64       
 10  MONTHLY_F3                   575 non-null    float64       
 11  MC                           573 non-null    float64       
 12  MONTHLY_SMC                  578 non-null    float64       
 13  MONTH                        578 non-null    datetime64[ns]
 14  COST_Kw_h                    576 non-null    float64       
 15  COST_MC                      571 non-null    float64       
 16  COST_SMC                     577 non-null    float64       
 17  MONTHLY_BILL_AMOUNT          576 non-null    float64       
 18  RENEWABLE_SOURCE_PERCENTUAL  567 non-null    float64       
 19  NOTES                        575 non-null    object        
dtypes: datetime64[ns](1), float64(10), int64(1), object(8)
memory usage: 90.4+ KB
In [11]:
#Create a graph depending of timeline
monthly_sms = data.groupby(["MONTH"])["MONTHLY_SMC"].sum()
tim1 = px.bar(x=monthly_sms.index, y=monthly_sms.values)
tim1.update_layout(xaxis_tickformat="%b")
tim1.update_xaxes(title_text="Month", nticks=12)
tim1.update_yaxes(title_text="Monthly consumption of CO2, m3" )
tim1.show()
#tim1.write_image("offices_timebase1.jpeg")
tim1.write_image("offices_timebase1.png")
In [12]:
#NAN values count in a montly consumption
MONTHLY_F1_nan = data["MONTHLY_F1"].isnull().sum()
MONTHLY_F1_nan
Out[12]:
2
In [13]:
MC_nan = data["MC"].isnull().sum()
MC_nan
Out[13]:
5
In [14]:
MONTHLY_SMC_nan = data["MONTHLY_SMC"].isnull().sum()
MONTHLY_SMC_nan
Out[14]:
0
In [15]:
#RENEWABLE_SOURCE_PERCENTUAL analysis
data[data["RENEWABLE_SOURCE_PERCENTUAL"] > 0].count()
Out[15]:
ID                             12
BILL_ID                        12
SUPPLIER                       12
OFFICE                         12
UTILITY_LOCATION               12
UTILITY_CITY                   12
UTILITY_REGION                 12
SUPPLY_TYPE                    12
MONTHLY_F1                     12
MONTHLY_F2                     12
MONTHLY_F3                     12
MC                             12
MONTHLY_SMC                    12
MONTH                          12
COST_Kw_h                      12
COST_MC                        12
COST_SMC                       12
MONTHLY_BILL_AMOUNT            12
RENEWABLE_SOURCE_PERCENTUAL    12
NOTES                          12
dtype: int64
In [16]:
data["OFFICE"].unique()
Out[16]:
array(['ALTRO', 'MILANO', 'GENOVA', 'AVELLINO', 'SENIGALLIA', 'FERRARA',
       'ORTONA'], dtype=object)
In [17]:
data["RENEWABLE_SOURCE_PERCENTUAL"].unique()
#Basically only 12 resourses where using renewable source of energy
Out[17]:
array([0.  , 0.45,  nan, 1.  ])
In [18]:
ren1 = px.pie(data, values="RENEWABLE_SOURCE_PERCENTUAL", names="OFFICE", title="Renewable sources percentage")
ren1.show()
ren1.write_image("office_renewable.png")
In [19]:
#SUPPLIER and SUPPLY_TYPE analysis
data["SUPPLIER"].unique()
Out[19]:
array(['A2A ENERGIA SPA', 'STADTISCHES ELEKTRIZITATSWERK STERZING',
       'GREEN NETWORK SPA', 'AGN/GPL', 'ASCOTRADE', 'AXPO ITALIA S.p.A.',
       'DUFERCO ENERGIA SPA', 'ENI GAS E LUCE SPA', 'GOLDENERGY s.r.l.',
       'HERA S.p.A.', 'FORNITORE', 'IREN MERCATO S.p.A.', 'SIDIGAS SRL',
       'SICME ENERGY E GAS SRL',
       'SERVIZIO ELETTRICO NAZIONALE - SERVIZIO DI MAGGIOR TUTELA',
       'ELETTRAGAS S.R.L.', '#NAME?', 'COBEGAS SRL',
       'ROMA GAS & POWER SRL', 'ZELO'], dtype=object)
In [20]:
data["SUPPLY_TYPE"].unique()
Out[20]:
array(['METANO', 'ENERGIA', 'GPL', 'ACQUA', nan, 'ALTRO GAS'],
      dtype=object)
In [21]:
#s.rename({1: 3, 2: 5})
#supply = pd.Series(data["SUPPLY_TYPE"])
data["SUPPLY_TYPE"] = data["SUPPLY_TYPE"].replace({"METANO": "METHANE", "ENERGIA": "ENERGY", "GPL": "LPG", 
                "ACQUA": "WATER", "ALTRO GAS": "OTHER GAS"})
In [22]:
data["SUPPLY_TYPE"].value_counts() #GPL = Liquefied petroleum gas 
Out[22]:
ENERGY       301
METHANE      214
WATER         35
LPG           15
OTHER GAS     12
Name: SUPPLY_TYPE, dtype: int64
In [23]:
#Group the offices, supplier, supply_type and count how many of them are there
group_sup = data.groupby(["OFFICE", "SUPPLY_TYPE", "SUPPLIER"], as_index=False).count()
group_sup.head()
Out[23]:
OFFICE SUPPLY_TYPE SUPPLIER ID BILL_ID UTILITY_LOCATION UTILITY_CITY UTILITY_REGION MONTHLY_F1 MONTHLY_F2 MONTHLY_F3 MC MONTHLY_SMC MONTH COST_Kw_h COST_MC COST_SMC MONTHLY_BILL_AMOUNT RENEWABLE_SOURCE_PERCENTUAL NOTES
0 ALTRO ENERGY #NAME? 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
1 ALTRO ENERGY A2A ENERGIA SPA 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21
2 ALTRO ENERGY AXPO ITALIA S.p.A. 43 43 43 43 43 43 43 42 43 43 43 43 43 43 43 43 43
3 ALTRO ENERGY DUFERCO ENERGIA SPA 62 62 62 62 62 62 62 62 58 62 62 62 62 62 61 58 59
4 ALTRO ENERGY GOLDENERGY s.r.l. 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
In [24]:
#Visualisation of each supplier
sup1 = px.sunburst(group_sup, path=["OFFICE", "SUPPLY_TYPE", "SUPPLIER"], values=group_sup["ID"])
sup1.show()
sup1.write_image("office_sunburst_supplier.png")
/Users/olhafedyshyn/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/Users/olhafedyshyn/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/Users/olhafedyshyn/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

In [25]:
#Sum the data by offices
data_group3 = data.groupby(by=["OFFICE"]).sum()
data_group3
Out[25]:
ID MONTHLY_F1 MONTHLY_F2 MONTHLY_F3 MC MONTHLY_SMC COST_Kw_h COST_MC COST_SMC MONTHLY_BILL_AMOUNT RENEWABLE_SOURCE_PERCENTUAL
OFFICE
ALTRO 90763 16329.88 14140.81 17567.54 4.0 6951.90 244.02 0.00 276.74 27128.03 1.00
AVELLINO 12987 67235.00 15846.00 24098.00 711.5 0.00 4.80 19.75 0.00 32805.61 0.00
FERRARA 36773 163033.23 55019.79 97396.95 3611.0 29575.00 7.10 0.00 15.28 117034.19 0.00
GENOVA 5145 13683.00 4881.00 7472.00 0.0 2119.00 5.55 0.00 67.01 13434.99 4.50
MILANO 1257 148620.00 45667.00 71554.00 0.0 28867.00 2.20 0.00 9.36 70456.00 0.45
ORTONA 11928 93896.60 33802.78 60093.82 0.0 766.91 0.00 0.00 3.90 801.25 0.00
SENIGALLIA 8478 8281.94 4464.47 7755.29 0.0 6006.43 4.98 0.00 13.86 13271.06 0.00
In [26]:
fig5 = go.Figure(data=[
    go.Bar(name="MONTLY_SMC", x=data_group3.index, y=data_group3["MONTHLY_SMC"], yaxis="y", offsetgroup=1),
    go.Bar(name="COST_SMC", x=data_group3.index, y=data_group3["COST_SMC"], yaxis="y2", offsetgroup=2)],
                layout={
        'yaxis': {'title': 'Consumption of gas, m3'},
        'yaxis2': {'title': 'Cost, € per unit', 'overlaying': 'y', 'side': 'right'}
    })

fig5.update_layout(barmode="group", xaxis_title="Office")
fig5.show()
In [27]:
fig8 = go.Figure(data=[
    go.Bar(name="MC", x=data_group3.index, y=data_group3["MC"], yaxis="y", offsetgroup=1),
    go.Bar(name="COST_MC", x=data_group3.index, y=data_group3["COST_MC"], yaxis="y2", offsetgroup=2)],
                layout={
        'yaxis': {'title': 'Consumption of water, m3'},
        'yaxis2': {'title': 'Cost, € per unit', 'overlaying': 'y', 'side': 'right'}
    })

fig8.update_layout(barmode="group", xaxis_title="Office")
fig8.show()
In [28]:
fig7 = go.Figure(data=[
                go.Bar(name="MONTHLY_F1", x=data_group3.index, y=data_group3["MONTHLY_F1"]),
                go.Bar(name="MONTHLY_F2", x=data_group3.index, y=data_group3["MONTHLY_F2"]),
                go.Bar(name="MONTHLY_F3", x=data_group3.index, y=data_group3["MONTHLY_F3"])])
       
fig7.update_layout(barmode="group", xaxis_title="Office", yaxis_title=f"Electricity consumption <br> in different part of the month, Kw/h")
fig7.show()
fig7.write_image("office_monthly_consumption.png")
In [ ]: